# -*- coding: utf-8 -*-
"""
Code Introduction:
    This code 
Version History:
    Created: Mon Mar  8 10:57:28 2021
    Current: 

@author: Xing Guo (guoxing.econ@gmail.com)

"""

#%% Setup Working Directory

import os
## Windows System Path
FolderList = [xx+"Dropbox (Bank of Canada)\\Research Projects\\OHANK\\Empirics\\Analysis_Aggregate" \
              for xx in ["D:\\","B:\\","/mnt/b/"]]
for Folder in FolderList:
    if os.path.exists(Folder):
        os.chdir(Folder)    

## Output Folder
OutputFolder = 'TableGraph/'
if not os.path.exists(OutputFolder):
    os.makedirs(OutputFolder)
# End of Section: Setup Working Directory
###############################################################################

#%% Import Moduels

## System Tools

import numpy as np
from collections import OrderedDict
import time
## I/O Tools
import _pickle as pickle
## Data Process Tools
import pandas as pd
# import modin.pandas as pd
import datetime
## Graphs
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf as figpdf
## Statistical Tools
import statsmodels.formula.api as sm
from statsmodels.tsa.api import VAR
from scipy.stats import mstats
from scipy.interpolate import interp1d
import statsmodels.api as SMAPI
from statsmodels.tsa.tsatools import detrend as DeTrend
from statsmodels.tsa.filters.hp_filter import hpfilter as HPfilter
from statsmodels.tsa.filters.bk_filter import bkfilter as BKfilter
## Database API
from fredapi import Fred
## Numerical API
from scipy.interpolate import interp1d
## Regular Expression API
import re
import multiprocessing as mp

idx = pd.IndexSlice

# from Toolbox_Graph import Graph_PDF, GraphSetup, MultiLine_SinglePlot, NBER_RecessionBar, IRF_SinglePlot
# End of Section: Import Moduels
###############################################################################

#%% Tradable vs. Non-tradable: 
    
### Employment Share


DS_Emp = pd.read_csv("../Data/Aggregate/Employment.csv")

Var_Sec = 'North American Industry Classification System (NAICS)'
Var_VA = 'VALUE'
Var_Time ='REF_DATE'

DS_Emp = DS_Emp.pivot(index=Var_Time, columns=Var_Sec, values=Var_VA)

SecList_T = ['Forestry, fishing, mining, quarrying, oil and gas [21, 113-114, 1153, 2100]', \
             'Manufacturing [31-33]','Agriculture [111-112, 1100, 1151-1152]', \
             'Information, culture and recreation [51, 71]', \
             'Wholesale trade [41]', \
             'Professional, scientific and technical services [54]', \
             'Finance and insurance [52]']
SecList_All = 'Total, all industries'

SecList_N = list(set(DS_Emp.columns)-set(SecList_T)-set([SecList_All]))

DS_Emp['Agg_T'] = DS_Emp[SecList_T].sum(axis=1)
DS_Emp['Agg_N'] = DS_Emp[SecList_N].sum(axis=1)
DS_Emp['Agg'] = DS_Emp[SecList_All]

Share_T_Employment = DS_Emp[SecList_T+SecList_N+['Agg_T']].div(DS_Emp['Agg'],axis=0)

Share_Employment = pd.concat([Share_T_Employment[SecList_T].mean(),Share_T_Employment[SecList_N].mean()],axis=0,keys=['T','N']) \
             .reset_index().rename(columns={'level_0': 'TN',0: 'Pct'}) \
             .sort_values(['TN','Pct']).set_index(['TN',Var_Sec])['Pct']

ExcelWriter = pd.ExcelWriter("TableGraph//Share_T_N.xlsx")
Share_Employment.to_excel(ExcelWriter,sheet_name='Employment')
### Value Added Share
DS_Temp = pd.read_csv("../Data/Aggregate/ValueAdded.csv")

Var_Sec = 'North American Industry Classification System (NAICS)'
Var_Val = 'VALUE'
Var_Time ='REF_DATE'
Var_Stat = 'Multifactor productivity and related variables'

DS_GDP = DS_Temp[DS_Temp[Var_Stat]=='Gross domestic product (GDP)'] \
        .pivot(index=Var_Time, columns=Var_Sec, values=Var_Val)
DS_LaborIncome = DS_Temp[DS_Temp[Var_Stat]=='Labour compensation'] \
                .pivot(index=Var_Time, columns=Var_Sec, values=Var_Val)
SecList_T = ['Agriculture, forestry, fishing and hunting [11]', \
             'Manufacturing [31-33]','Mining and oil and gas extraction [21]', \
             'Information and cultural industries [51]', \
             'Wholesale trade [41]', \
             'Professional, scientific and technical services [54]', \
             'Arts, entertainment and recreation [71]' \
             ]
SecList_All = 'Business sector'

SecList_N = list(set(DS_GDP.columns)-set(SecList_T)-set([SecList_All]))

DS_GDP['Agg_T'] = DS_GDP[SecList_T].sum(axis=1)
DS_GDP['Agg'] = DS_GDP[SecList_All]

DS_LaborIncome['Agg_T'] = DS_LaborIncome[SecList_T].sum(axis=1)
DS_LaborIncome['Agg'] = DS_LaborIncome[SecList_All]

Share_T_GDP = DS_GDP[SecList_T+SecList_N+['Agg_T']].div(DS_GDP['Agg'],axis=0)
Share_T_LaborIncome = DS_LaborIncome[SecList_T+SecList_N+['Agg_T']].div(DS_LaborIncome['Agg'],axis=0)

Share_GDP = pd.concat([Share_T_GDP[SecList_T].mean(),Share_T_GDP[SecList_N].mean()],axis=0,keys=['T','N']) \
             .reset_index().rename(columns={'level_0': 'TN',0: 'Pct'}) \
             .sort_values(['TN','Pct']).set_index(['TN',Var_Sec])['Pct']
             
Share_LaborIncome = pd.concat([Share_T_LaborIncome[SecList_T].mean(),Share_T_LaborIncome[SecList_N].mean()],axis=0,keys=['T','N']) \
             .reset_index().rename(columns={'level_0': 'TN',0: 'Pct'}) \
             .sort_values(['TN','Pct']).set_index(['TN',Var_Sec])['Pct']

Share_NIPA = pd.concat([Share_GDP,Share_LaborIncome[Share_GDP.index]],axis=1,keys=['GDP','LaborIncome'])

Share_NIPA.to_excel(ExcelWriter,sheet_name='NIPA')

Share = pd.concat([Share_Employment,Share_GDP,Share_LaborIncome[Share_GDP.index]], \
                  axis=1,keys=['Employment','GDP','LaborIncome'])

Share.to_excel(ExcelWriter,sheet_name='All')
ExcelWriter.close()
# End of Section: 
###############################################################################

#%% Household Income Composition

DS_Temp = pd.read_csv("Data/Aggregate/FamilyIncome.csv")

Var_IncomeType = 'Income concept'
Var_Val = 'VALUE'
Var_Time ='REF_DATE'

DS_Income = DS_Temp.pivot(index=Var_Time, columns=Var_IncomeType, values=Var_Val)

DS_Income['TaxRatio'] = DS_Income['Average income tax'] \
                        /DS_Income['Average market income']
DS_Income['PostTaxRatio'] = DS_Income['Average after-tax income'] \
                            /DS_Income['Average market income']
DS_Income['TransferRatio'] =  DS_Income['Average government transfers'] \
                              /DS_Income['Average market income']
                              
DS_Income[['TaxRatio','TransferRatio']].mean()

#%% Government Debt as a Ratio to GDP
fred = Fred(api_key='86cde3dec5dda5ffca44b58f01838b1e')
DS_GovDebt2GDP = fred.get_series('GGGDTACAA188N')

#%% Openness Measure
fred = Fred(api_key='86cde3dec5dda5ffca44b58f01838b1e')
CA_Openness = fred.get_series('OPENRPCAA156NUPN')

CA_Export = fred.get_series('CANEXPORTADSMEI')
CA_Import = fred.get_series('CANIMPORTADSMEI')
CA_GDP = fred.get_series('CANGDPNADSMEI')

CA_Openness = pd.concat([CA_GDP,CA_Import,CA_Export], \
                        keys=['GDP','Import','Export'], \
                        axis=1)
CA_Openness['OpenIdx'] = ( CA_Openness['Import']+CA_Openness['Export'] )/ \
                         CA_Openness['GDP']
CA_Openness['OpenIdx_Import'] = CA_Openness['Import']/CA_Openness['GDP']
CA_Openness['OpenIdx_Export'] = CA_Openness['Export']/CA_Openness['GDP']
CA_Openness[['OpenIdx','OpenIdx_Import','OpenIdx_Export']].plot()                 